#importing the libraries
library(scatterplot3d) # part(a)
library(tidyverse) #filter etc.
library(ggplot2) #plot in reduced dimensions
library(reshape) #melt

library(dplyr)
library(DT) #datatable

Introduction:

Given uWaveGestureLibrary, consisting over 4000 instances from 8 people with 8 different gestures, our aim is to visualize the gestures. Then, we will try to apply dimensionality reduction. There are 8 different gesture classes for each axis, x, y and z.

Tasks:

A. Read the Data and Plot an Instance from Each Gesture

#reading the data from repository
#x_test=read.table("https://github.com/BU-IE-582/fall20-ilaydacelenk/blob/master/files/HW2_data/uWaveGestureLibrary_X_TEST.txt?raw=true")
x_train=read.table("https://github.com/BU-IE-582/fall20-ilaydacelenk/blob/master/files/HW2_data/uWaveGestureLibrary_X_TRAIN.txt?raw=true")
#y_test=read.table("https://github.com/BU-IE-582/fall20-ilaydacelenk/blob/master/files/HW2_data/uWaveGestureLibrary_Y_TEST.txt?raw=true")
y_train=read.table("https://github.com/BU-IE-582/fall20-ilaydacelenk/blob/master/files/HW2_data/uWaveGestureLibrary_Y_TRAIN.txt?raw=true")
#z_test=read.table("https://github.com/BU-IE-582/fall20-ilaydacelenk/blob/master/files/HW2_data/uWaveGestureLibrary_Z_TEST.txt?raw=true")
z_train=read.table("https://github.com/BU-IE-582/fall20-ilaydacelenk/blob/master/files/HW2_data/uWaveGestureLibrary_Z_TRAIN.txt?raw=true")

For the x-axis, let’s find the indices where we see each gesture for the first time. It is enough to look at x_train since y_train and z_train will have the same indices for the first occurrences of each gesture.

In order to find the velocity vector, we need to compute the cumulative sum of acceleration over time. Since we got the indices, it is easy to sum the

#the first instances when we see gesture of class i, i from 1 to 8
index_vector = c()
for(i in 1:8) 
{
  index_vector <- index_vector %>% append(match(i, x_train$V1)) 
}

# Creating cumulative sum matrices 2 times to get first the velocity vector and then the location vector for each instance
# using velocity(t)=cumsum(acceleration)*change in t
# using position(t)=cumsum(velocity)*change in t

loc_matrix <- function(acc_matrix){
  order_of_instances <- acc_matrix$V1
  acc_matrix$V1 <- 0
  loc <- acc_matrix %>% mutate(V1=0) %>% apply(1, cumsum) %>% t() %>% apply(1, cumsum) %>% t() %>% as.data.frame() %>% mutate(V1=order_of_instances)
  return(loc)
}
  
x_position <- loc_matrix(x_train) %>% as.matrix()
y_position <- loc_matrix(y_train) %>% as.matrix()
z_position <- loc_matrix(z_train) %>% as.matrix()

#par(mfrow=c(1,1))

plot_3d <- function(indices){
  for(i in 1:8) {
    scatterplot3d(x_position[index_vector[i],-1], y_position[index_vector[i],-1], z_position[index_vector[i],-1], main = paste("Gesture Class ", x_position[index_vector[i],1]) ,xlab = "X Axis", ylab = "Y Axis", zlab = "Z Axis", col.grid="lightblue",type = "p", color = "red")
  }
}
  
plot_3d(index_vector)

B. Dimensionality Reduction - PCA on the Whole Data

Here we are dealing with multivariate time series and we would like to reduce it to a univariate time series. In order to achieve this, we transform the data into the long format first.

Column V2 is time 1 and column V316 represents the time index 315.

x_long <- x_position %>% as.data.frame() %>% mutate(id = seq.int(nrow(x_position))) %>% melt(id.vars = c("id", "V1")) %>% transmute(time_series_id=id, time_index=variable, X=value, class=V1)

y_long <- y_position %>% as.data.frame() %>% mutate(id = seq.int(nrow(x_position))) %>% melt(id.vars = c("id", "V1")) %>% transmute(time_series_id=id, time_index=variable, Y=value, class=V1)

z_long <- z_position %>% as.data.frame() %>% mutate(id = seq.int(nrow(x_position))) %>% melt(id.vars = c("id", "V1")) %>% transmute(time_series_id=id, time_index=variable, Z=value, class=V1)

xyz_long <- x_long %>% select(-class) %>% mutate(Y=y_long$Y, Z=z_long$Z, class = x_long$class)  %>% mutate(time_index = sub(".", "", time_index)) 

xyz_long$time_index <- as.numeric(xyz_long$time_index) - 1 

Then, we apply PCA on X, Y and Z. This will be applied to the whole data. According to the PCA results, the variance covered by the first component is approximately 1.27 and it is the linear combination 0.209X + 0.723Y + 0.658*Z. Looking at the proportions Comp.1 covers 54% of the variance, Comp.2 covers 36% and Comp.3 covers 10% and they all add up to 100% of the variance. Since we would like to turn it into a univariate time series, we will only choose Comp.1 which has the highest percentage of variance covering.

pca <- princomp(xyz_long[,3:5], cor=T) #scale differences T
print(summary(pca, loadings=T))
FALSE Importance of components:
FALSE                           Comp.1    Comp.2    Comp.3
FALSE Standard deviation     1.2724995 1.0380930 0.5505524
FALSE Proportion of Variance 0.5397517 0.3592124 0.1010360
FALSE Cumulative Proportion  0.5397517 0.8989640 1.0000000
FALSE 
FALSE Loadings:
FALSE   Comp.1 Comp.2 Comp.3
FALSE X  0.209  0.908  0.362
FALSE Y  0.723  0.105 -0.683
FALSE Z  0.658 -0.404  0.635
xyz_long_uni <- xyz_long %>% mutate(value = 0.209*X + 0.723*Y + 0.658*Z) %>% select(-X, -Y, -Z)

#selecting 2 random series from each class
ex_class1 <- xyz_long_uni %>% filter(time_series_id==11 | time_series_id==17) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class2 <- xyz_long_uni %>% filter(time_series_id==15 | time_series_id==20) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class3 <- xyz_long_uni %>% filter(time_series_id==4 | time_series_id==13) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class4 <- xyz_long_uni %>% filter(time_series_id==5 | time_series_id==8) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class5 <- xyz_long_uni %>% filter(time_series_id==2 | time_series_id==3) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class6 <- xyz_long_uni %>% filter(time_series_id==1 | time_series_id==10) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class7 <- xyz_long_uni %>% filter(time_series_id==7 | time_series_id==12) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class8 <- xyz_long_uni %>% filter(time_series_id==6 | time_series_id==21) %>% mutate(time_series_id = as_factor(time_series_id))

ggplot(ex_class1) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 1")

ggplot(ex_class2) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 2")

ggplot(ex_class3) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 3")

ggplot(ex_class4) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 4")

ggplot(ex_class5) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 5")

ggplot(ex_class6) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 6")

ggplot(ex_class7) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 7")

ggplot(ex_class8) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 8")

C. PCA on Different

xyz_long_1 <- xyz_long %>% filter(class==1)
pca1 <- princomp(xyz_long_1[,3:5], cor=T) #scale differences T
print(summary(pca1, loadings=T))
FALSE Importance of components:
FALSE                           Comp.1    Comp.2    Comp.3
FALSE Standard deviation     1.2932492 1.0091473 0.5559931
FALSE Proportion of Variance 0.5574978 0.3394594 0.1030428
FALSE Cumulative Proportion  0.5574978 0.8969572 1.0000000
FALSE 
FALSE Loadings:
FALSE   Comp.1 Comp.2 Comp.3
FALSE X  0.119  0.973  0.198
FALSE Y  0.693 -0.224  0.685
FALSE Z  0.711        -0.701
xyz_long_uni_1 <- xyz_long_1 %>% mutate(value = 0.119*X + 0.693*Y + 0.711*Z) %>% select(-X, -Y, -Z)
ex_class1_1 <- xyz_long_uni_1 %>% filter(time_series_id==11 | time_series_id==17) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class1_1) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 1")

xyz_long_2 <- xyz_long %>% filter(class==2)
pca2 <- princomp(xyz_long_2[,3:5], cor=T) #scale differences T
print(summary(pca2, loadings=T))
FALSE Importance of components:
FALSE                           Comp.1    Comp.2    Comp.3
FALSE Standard deviation     1.1286537 0.9457021 0.9120243
FALSE Proportion of Variance 0.4246197 0.2981175 0.2772628
FALSE Cumulative Proportion  0.4246197 0.7227372 1.0000000
FALSE 
FALSE Loadings:
FALSE   Comp.1 Comp.2 Comp.3
FALSE X  0.535  0.814  0.224
FALSE Y  0.613 -0.192 -0.767
FALSE Z  0.581 -0.548  0.602
xyz_long_uni_2 <- xyz_long_2 %>% mutate(value = 0.535*X + 0.613*Y + 0.581*Z) %>% select(-X, -Y, -Z)
ex_class2_2 <- xyz_long_uni_2 %>% filter(time_series_id==15 | time_series_id==20) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class2_2) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 2")

xyz_long_3 <- xyz_long %>% filter(class==3)
pca3 <- princomp(xyz_long_3[,3:5], cor=T) #scale differences T
print(summary(pca3, loadings=T))
FALSE Importance of components:
FALSE                           Comp.1    Comp.2    Comp.3
FALSE Standard deviation     1.2156489 1.0027730 0.7187794
FALSE Proportion of Variance 0.4926008 0.3351846 0.1722146
FALSE Cumulative Proportion  0.4926008 0.8277854 1.0000000
FALSE 
FALSE Loadings:
FALSE   Comp.1 Comp.2 Comp.3
FALSE X  0.709         0.705
FALSE Y  0.371  0.847 -0.380
FALSE Z -0.599  0.532  0.599
xyz_long_uni_3 <- xyz_long_3 %>% mutate(value = 0.709*X + 0.371*Y - 0.599*Z) %>% select(-X, -Y, -Z)
ex_class3_3 <- xyz_long_uni_3 %>% filter(time_series_id==4 | time_series_id==13) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class3_3) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 3")

xyz_long_4 <- xyz_long %>% filter(class==4)
pca4 <- princomp(xyz_long_4[,3:5], cor=T) #scale differences T
print(summary(pca4, loadings=T))
FALSE Importance of components:
FALSE                           Comp.1    Comp.2    Comp.3
FALSE Standard deviation     1.2855781 0.9914895 0.6035210
FALSE Proportion of Variance 0.5509037 0.3276838 0.1214125
FALSE Cumulative Proportion  0.5509037 0.8785875 1.0000000
FALSE 
FALSE Loadings:
FALSE   Comp.1 Comp.2 Comp.3
FALSE X  0.702         0.711
FALSE Y  0.674  0.285 -0.681
FALSE Z -0.230  0.958  0.173
xyz_long_uni_4 <- xyz_long_4 %>% mutate(value = 0.702*X + 0.679*Y - 0.230*Z) %>% select(-X, -Y, -Z)
ex_class4_4 <- xyz_long_uni_4 %>% filter(time_series_id==5 | time_series_id==8) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class4_4) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 4")

xyz_long_5 <- xyz_long %>% filter(class==5)
pca5 <- princomp(xyz_long_5[,3:5], cor=T) #scale differences T
print(summary(pca5, loadings=T))
FALSE Importance of components:
FALSE                           Comp.1    Comp.2     Comp.3
FALSE Standard deviation     1.4038307 0.9685920 0.30180949
FALSE Proportion of Variance 0.6569135 0.3127235 0.03036299
FALSE Cumulative Proportion  0.6569135 0.9696370 1.00000000
FALSE 
FALSE Loadings:
FALSE   Comp.1 Comp.2 Comp.3
FALSE X  0.245  0.970       
FALSE Y  0.685 -0.177  0.707
FALSE Z  0.686 -0.170 -0.708
xyz_long_uni_5 <- xyz_long_5 %>% mutate(value = 0.245*X + 0.685*Y + 0.686*Z) %>% select(-X, -Y, -Z)
ex_class5_5 <- xyz_long_uni_5 %>% filter(time_series_id==2 | time_series_id==3) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class5_5) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 5")

xyz_long_6 <- xyz_long %>% filter(class==6)
pca6 <- princomp(xyz_long_6[,3:5], cor=T) #scale differences T
print(summary(pca6, loadings=T))
FALSE Importance of components:
FALSE                           Comp.1    Comp.2     Comp.3
FALSE Standard deviation     1.3400570 0.9638120 0.52470332
FALSE Proportion of Variance 0.5985843 0.3096445 0.09177119
FALSE Cumulative Proportion  0.5985843 0.9082288 1.00000000
FALSE 
FALSE Loadings:
FALSE   Comp.1 Comp.2 Comp.3
FALSE X  0.291  0.955       
FALSE Y -0.671  0.251 -0.698
FALSE Z -0.682  0.162  0.713
xyz_long_uni_6 <- xyz_long_6 %>% mutate(value = 0.291*X - 0.671*Y - 0.682*Z) %>% select(-X, -Y, -Z)
ex_class6_6 <- xyz_long_uni_6 %>% filter(time_series_id==1 | time_series_id==10) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class6_6) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 6")

xyz_long_7 <- xyz_long %>% filter(class==7)
pca7 <- princomp(xyz_long_7[,3:5], cor=T) #scale differences T
print(summary(pca7, loadings=T))
FALSE Importance of components:
FALSE                           Comp.1    Comp.2    Comp.3
FALSE Standard deviation     1.2971922 0.8883869 0.7266781
FALSE Proportion of Variance 0.5609026 0.2630771 0.1760203
FALSE Cumulative Proportion  0.5609026 0.8239797 1.0000000
FALSE 
FALSE Loadings:
FALSE   Comp.1 Comp.2 Comp.3
FALSE X  0.596  0.487  0.639
FALSE Y  0.632  0.207 -0.747
FALSE Z  0.496 -0.849  0.184
xyz_long_uni_7 <- xyz_long_7 %>% mutate(value = 0.596*X + 0.632*Y + 0.496*Z) %>% select(-X, -Y, -Z)
ex_class7_7 <- xyz_long_uni_7 %>% filter(time_series_id==7 | time_series_id==12) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class7_7) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 7")

xyz_long_8 <- xyz_long %>% filter(class==8)
pca8 <- princomp(xyz_long_8[,3:5], cor=T) #scale differences T
print(summary(pca8, loadings=T))
FALSE Importance of components:
FALSE                           Comp.1    Comp.2    Comp.3
FALSE Standard deviation     1.2203138 0.9683089 0.7571078
FALSE Proportion of Variance 0.4963886 0.3125407 0.1910707
FALSE Cumulative Proportion  0.4963886 0.8089293 1.0000000
FALSE 
FALSE Loadings:
FALSE   Comp.1 Comp.2 Comp.3
FALSE X  0.664  0.253  0.704
FALSE Y  0.337 -0.941       
FALSE Z  0.668  0.223 -0.710
xyz_long_uni_8 <- xyz_long_8 %>% mutate(value = 0.664*X + 0.337*Y + 0.668*Z) %>% select(-X, -Y, -Z)
ex_class8_8 <- xyz_long_uni_8 %>% filter(time_series_id==6 | time_series_id==21) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class8_8) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 8")

D. Visualize in the Reduced Dimensions

Reference

[1] J. Liu, Z. Wang, L. Zhong, J. Wickramasuriya, and V. Vasudevan. uWave: Accelerometer-based personalized gesture recognition and its applications. Pervasive Computing and Communications, IEEE International Conference on, 0:1-9, 2009. (link: https://www.recg.org/publications/liu09percom.pdf)